## **Aim of the Script**
######### Transcriptional Factor analysis in neighborhood Zone (TFinZONE) ###############################################
##### This Script aims to analyze the DNA-Motifs associated with the chromatin States areas as scRNA-seq data. Instead of an expression of genes, we propose an enrichment score. The multiplication of the motif score on the Peak and the log10-Pvalue of the motif in the chromatin state of interest.
## Input files
##
## **Input files**
##### 08MOTIFS_on_PEAKS
- Folder containing all the output results from homer motifs of all Peaks files used in the program.
##### 08TSS_Enh_w_01dpci_fHeatmap_ALL.xlsx
- Matrix file, Containing the log10Pvalue from each motifs.
## **Output files**
#### 03OUTPUT
- Folder containing all the plots used in figure 3. As well as additional output from the Script
workdir = "./"
setwd(workdir)
PTHA="../03OUTPUT/"
dir.create(PTHA)
PROJECT="03a_TFinZONE_at_01dpci"
PORT=paste(PTHA,PROJECT,"/",sep="")
PTHA1=paste(PTHA,PROJECT,"/",sep="")
dir.create(PTHA)
dir.create(PORT)
dir.create(PTHA1)
TRY="_"
TRY1="02Ia_"
PVALUE=0.05
#### FOR VOLCANO ###
upcol<- "#B2182B" # magenta from PiyG
nc<- "#000000" # black
downcol<- "grey" # green from PiyG
CC= c(downcol, "#F7F7F7",upcol)
CO3<- c("lightgrey","#B2182B")
CO_ALU3=c("#d7191c","#d8b365","#542788","grey", "#91bfdb", "grey","grey")
NAME33<- c("Mo_Name","Consensus","P-value","Log_P-value","q-value_Benja","Nor_of_Tar","Per_of_Tar","No_of_Tar_Backg","Per_of_Tar_Backg")
NAME34<- c("HOmer_NAME",NAME33)
mat_m<- data.frame("mask_500_FIND")
## Here you write the Path to the
WORK1="../01DATA_ORI/08MOTIFS_on_PEAKS/"
#WORK1="Y:/002ZF/000FIGURES_ANALY/00GITHUB_01HIS_Chroma_Factors/01HIS_Chrom_Factors_TEST/04MOtifs_Analy_as_SC_MOAC_Fig3_5/02Fig4_MO_MOAC_01dpci/01DATA_ORI/08MOTIFS_on_PEAKS/"
NAME_G2 <-data.frame(list.files(path=WORK1,pattern="*_FIND"))
colnames(NAME_G2)<- "ENH4"
#NAME_G2$GROUP <- gsub("CORR1_01d_", "", NAME_G2$ENH4)
NAME_G2$GROUP <- gsub("*_mask_500_FIND", "", NAME_G2$ENH4)
### this is important to check the SYMBOL
NAME_G22<- data.frame(c("00A_Ia","01Ia_A","02U_Ea","03U_Ia","04U_Pa"))
NAME_G2c<- as.character(t(NAME_G2[,2]))
#WORK11A11="../01DATA_ORI/07aMO_forMOAC/08TSS_Enh_w_01dpci_fHeatmap_ALL.xlsx"
WORK11A11="../01DATA_ORI/07aMO_forSeurat_ANALY/08TSS_Enh_w_01dpci_fHeatmap_ALL.xlsx"
MAT_1A= read_excel(WORK11A11)
colnames(MAT_1A)[1]<- "SYMBOL"
dim(MAT_1A)
## [1] 326 11
##### Set resolution for CLustering
RES=0.5
## sequential:
## - args: function (..., envir = parent.frame())
## - tweaked: FALSE
## - call: NULL
## sequential:
## - args: function (..., workers = 16, envir = parent.frame())
## - tweaked: TRUE
## - call: plan("sequential", workers = 16)
##
## 00A_Ia_01dcpi 01Ia_A_01dcpi 02U_Ea_01dcpi 03U_Ia_01dcpi 04U_Pa_01dcpi
## 278786 117 6814 66147 12513
## [1] 3
b7<- AC_RE_VOL_SC_META
rownames(b7)<- b7$PositionID2
mat_c1<- mat_cm_TMM_BOTH_SC[,rownames(b7)]
rownames(mat_c1)<- mat_cm_TMM_BOTH_SC$SYMBOL
dim(mat_c1)
## [1] 207 3090
dim(b7)
## [1] 3090 5
smartseq2 <- CreateSeuratObject(mat_c1,project = "01d", meta.data = b7)
## png
## 2
arrangeQC_TF <- ggarrange(EC00,EC001, ncol = 2,nrow =1,common.legend = T, align = c("hv"),legend="none")
print(arrangeQC_TF)
grid.arrange( plot1,plot2, ncol = 1,nrow = 2)
## Centering and scaling data matrix
## PC_ 1
## Positive: THRb, Hoxd12, Tbx5, Smad3, PR, Hoxd11, Eomes, Sox3, Olig2, Sox21
## Hoxa9, Hoxa11, Sox10, Hoxd10, Twist2, Smad2, GATA3, NeuroG2, Erra, TRPS1
## Nanog, CEBP, ERG, Smad4, EAR2, ETS1, Pdx1, BHLHA15, CDX4, PBX2
## Negative: Isl1, bHLHE41, Npas4, Gfi1b, Tbet, Tbx21, Otx2, CHR, Hoxa10, ZNF7
## Elk1, HNF6, Cux2, Unknown, PRDM10, SCL, IRF3, Oct6, Max, ELF1
## MYNN, HINFP, Tbox, CRE, STAT1, GATA, E2F3, TCFL2, JunD, NRF
## PC_ 2
## Positive: MNT, BMAL1, NPAS2, RUNX2, RUNX, TEAD, NPAS, MITF, Twist2, CLOCK
## RUNX1, CEBP, TEAD3, THRb, HIC1, Zic, PBX2, Atf3, Eomes, Hoxc9
## Sox3, TEAD2, Olig2, USF1, Usf2, Erra, CDX4, BHLHA15, SF1, Fos
## Negative: ETV4, Fli1, ETV1, GABPA, Elf4, Etv2, ERG, ELF3, EWS, EHF
## ETS1, ELF5, SPDEF, Elk4, Elk1, ETS, ELF1, SpiB, SCL, Npas4
## Meis1, bHLHE41, Isl1, FoxL2, Foxo1, Tbet, IRF8, Tbx21, GSC, Tgif1
## PC_ 3
## Positive: Isl1, Nanog, Tbet, Tbx21, Pitx1, Bapx1, Tbr1, Sox2, RARa, Etv2
## TRPS1, Hoxa10, Bcl6, Otx2, Gata2, Sox6, Gata1, EWS, Gfi1b, CHR
## ZNF7, Hoxa9, Gata4, Sox15, ELF3, ERG, Gata6, SCL, GATA3, ETS1
## Negative: Foxo3, FoxL2, Foxf1, MYB, FOXK1, AMYB, Npas4, Foxo1, HNF6, FOXP1
## Hoxd13, Foxa3, FOXA1, Cux2, Unknown, FOXK2, Elk4, ELF1, Cdx2, Elk1
## Foxa2, Oct6, BMYB, ETS, CRE, MYNN, CDX4, Meis1, Hoxa11, FOXM1
## PC_ 4
## Positive: BATF, Atf3, Fos, Fra1, JunB, Fra2, Fosl2, ETV1, ETV4, HIC1
## GABPA, Elf4, MYB, ETS1, Etv2, Max, ERG, Fli1, Elk4, RUNX2
## OCT, Pdx1, Hoxc9, Smad4, ETS, ELF3, ELF1, E2F3, PBX2, EHF
## Negative: FOXM1, FOXA1, Foxf1, FOXP1, Pitx1, Fox, FoxL2, FOXK2, SCL, Foxa2
## FOXK1, Foxo3, Isl1, GSC, Foxo1, Tbet, Tbx21, FoxD3, Otx2, Foxa3
## Nanog, Gata2, Bcl6, ZNF7, Gata1, TRPS1, Hoxa10, Hoxa13, Tbr1, CHR
## PC_ 5
## Positive: Hoxd13, CDX4, Hoxd11, Sox3, Cdx2, Hoxd12, MYB, Sox17, Hoxa11, ZNF711
## Smad3, Sox7, AMYB, Hoxd10, Elk4, Elf4, PAX5, HIF2a, Tbx5, Sox21
## Smad2, Smad4, Olig2, Sox10, Erra, NFIL3, THRb, RXR, PGR, Unknown
## Negative: Fra1, Fra2, Atf3, JunB, BATF, Fos, Fosl2, Fox, FOXP1, Foxa2
## FOXA1, FOXM1, Foxf1, Foxo3, FoxL2, FOXK2, FOXK1, Foxa3, Isl1, Pitx1
## Tbet, SCL, Tbx21, FoxD3, Otx2, GSC, STAT4, ZNF7, Bcl6, Foxo1
grid.arrange( P1, ncol = 1,nrow = 1)
print(P2)
## png
## 2
#grid.arrange( P3, ncol = 1,nrow = 1)
set.seed(1)
smartseq2 <- FindNeighbors(smartseq2, dims = 1:10)
smartseq2 <- FindClusters(smartseq2, resolution = RES, random.seed= 1, algorithm=1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3090
## Number of edges: 94891
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8747
## Number of communities: 11
## Elapsed time: 0 seconds
#########here
## png
## 2
## png
## 2
print(arrange1t)
print(arrange1)
print(arrange1bt)
print(arrange1c)
print(arrange1cat)
## png
## 2
print(arrange3)
print(arrange3t)
print(arrange2)
grid.arrange( P20,P201, ncol = 1,nrow = 2)
grid.arrange( P05ah_bino1, ncol = 1,nrow = 1)
grid.arrange( P05ah_bino, ncol = 1,nrow = 1)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- data.frame(smartseq2@assays[["RNA"]]@counts@i)
DATA_SC<- GetAssay(smartseq2,assay = "RNA")
DATA_SC1<- data.frame(DATA_SC@scale.data)
DATA_SC2<- data.frame(DATA_SC@scale.data)
#dim(DATA_SC1)
META_SEU<- data.frame(smartseq2@meta.data)
colnames(DATA_SC1)<- META_SEU$PositionID
colnames(META_SEU)[4]<- "SYMBOL_TF"
GENES001<- c("Twist2","Smad4", "Nanog","Smad2","Fos","Sox9","Sox10","JunB")
GENES002 <- c("Pitx1","Isl1", "Mef2b","FOXA1","TEAD3","GATA3", "Gata6", "Mef2b")
GENES003 <- c("Eomes","Oct4","Foxh1","CUX1","Bapx1", "Elk4","Foxo1","Fli1")
GENES004<- c("Rbp7","Rgcc", "Fabp4","Egfl7","Flt1", "Cd36","Nrp1","Tpm1", "Rbp1")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
## png
## 2
## png
## 2
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
## png
## 2
## png
## 2
print(arrange12)
## png
## 2
## png
## 2
print(P05a)
print(P05b)
print(P05c)
print(P05a1)
print(P05b1)
print(P05c1)
#grid.arrange( P04a1v, ncol = 1,nrow = 1)
#grid.arrange( P04b1v, ncol = 1,nrow = 1)
#grid.arrange( P04c1v, ncol = 1,nrow = 1)
## png
## 2
## png
## 2
print(arrange1CM41)
print(arrange1CM42)
print(arrange1CM43)